get_cost_data_cancer <- function( ) {
  
  ## add death date and index date to cost dt ------------------------------------
  temp <- data.table::copy(dt_original_fir[S_sample_source_XX == 'cnr_data' &
                                                    (S_index_date_XX != DMG_date_of_death_XX |
                                                       is.na(DMG_date_of_death_XX))])
  
  # convert S_index_date_XX to be of class Date for future merges
  temp[, S_index_date_XX := as.Date(fasttime::fastPOSIXct(S_index_date_XX))]
  
  # check that dt_org_plus is identified by the trio {ID, index_date, sample}
  if (nrow(unique(temp[, 
                              .(id_var, 
                                S_index_date_XX, 
                                S_sample_source_XX)])) != nrow(temp)) {
    stop("Problem with identifing unique observation in dt_org_plus")
  }
 
  print("temp feature build ")
  
  ## SQL query - 3rd table (dt_original_3rd) -------------------------------------
  time1 <- Sys.time()
  dt_original_3rd <- data.table::as.data.table(
    DBI::dbGetQuery(
      con,
      "sql_query"
    )
  )
  time2 <- Sys.time()
  print("Third table extraction took (in mins):")
  print(time2-time1)
  rm(time2, time1)
  print("Third table extraction finished.")
  
  
  
  ## cost  sample  sample ------------------------
  
  dt_cost_by_mainCat_2y <-
    rbind(
      dt_original_3rd,
      dt_original_4th,
      dt_original_5th
    ) 
  dt_cost_by_mainCat_2y[, main_cat := factor(main_cat)]
  dt_cost_by_mainCat_2y[, category := factor(category)]
  dt_cost_by_mainCat_2y[, profession := factor(profession)]
  dt_cost_by_mainCat_2y[, cost_date := 
                          as.Date(fasttime::fastPOSIXct(
                            paste(as.character(event_date_year),
                                  as.character(event_date_month),
                                  as.character(event_date_day),
                                  sep = "-")
                          ))]
  # due to some problems in the data base, sometimes event_length_days gets negative 
  # value which isn't possible. Thus is changed to 0 
  dt_cost_by_mainCat_2y[event_length_days<0, event_length_days := 1]
  dt_cost_by_mainCat_2y[, event_date_end := cost_date+event_length_days]
  dt_cost_by_mainCat_2y[, S_index_date_XX := 
                          as.Date(fasttime::fastPOSIXct(
                            paste(as.character(S_index_date_XX_year),
                                  as.character(S_index_date_XX_month),
                                  as.character(S_index_date_XX_day),
                                  sep = "-")
                          ))]
  
  dt_cost_final <- merge(
    x = temp[S_sample_source_XX %in% c("cnr_data"),  
                    .(id_var, S_index_date_XX, S_sample_source_XX,
                      DMG_date_of_death_XX, DMG_died_within_365d)],
    y = dt_cost_by_mainCat_2y[, .(id_var, S_index_date_XX, category, 
                                  profession,main_cat, amount, actual_cost, 
                                  cost_date, event_date_end)],
    by = c("id_var", "S_index_date_XX"),
    all.x = T,
    all.y = F
  )
  
  dt_cost_final[, DMG_date_of_death_XX := 
                  as.Date(fasttime::fastPOSIXct(DMG_date_of_death_XX))]
  dt_cost_final[, DMG_died_within_365d := factor(DMG_died_within_365d)]
  
  
  print("extraction ended ")
  
  return(dt_cost_final) 
  
}